{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "import lmdb\n",
    "import numpy as np\n",
    "import pickle\n",
    "import matplotlib.pyplot as plt\n",
    "import PIL.ImageOps\n",
    "from PIL import Image\n",
    "from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit\n",
    "\n",
    "import env\n",
    "from utils import KaggleCameraDataset, progress_iter\n",
    "\n",
    "%matplotlib inline\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## load training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_data = KaggleCameraDataset('../data/', train=True)\n",
    "additional_train_data = KaggleCameraDataset('../data/additional-train/', train=True)\n",
    "additional_val_data = KaggleCameraDataset('../data/additional-val/', train=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## calculate dataset size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "3145728\n",
      "Dataset will occupy 0.00 GB\n"
     ]
    }
   ],
   "source": [
    "crop_size = 1024\n",
    "n_crops = 1\n",
    "N = 1 # len(train_data) + len(additional_train_data) + len(additional_val_data)\n",
    "size = crop_size**2*3*n_crops*N\n",
    "print N\n",
    "print size\n",
    "print \"Dataset will occupy {0:.2f} GB\".format(size/float(2**30))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "196608\n"
     ]
    }
   ],
   "source": [
    "X = np.zeros((N, 256, 256, 3), dtype=np.uint8)\n",
    "print X.nbytes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## #1 gen data (center crops only)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b0eb916d7d624d8981fbe3bb88762fc7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "X = np.zeros((N, crop_size, crop_size, 3), dtype=np.uint8)\n",
    "\n",
    "pos = 0\n",
    "for x, _ in progress_iter(train_data, verbose=True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                w/2+crop_size/2, h/2+crop_size/2))\n",
    "    \n",
    "    X[pos, ...] = np.asarray(x, dtype=np.uint8)\n",
    "    pos += 1\n",
    "    \n",
    "y = np.asarray(train_data.y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## #1.1 save to lmdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48480452608"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "map_size = size + N * 1024 * 64\n",
    "map_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "37d23577303140d191ec44b75d37f4b1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b84693fab0044d7e8a351e15a54a5a37",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "123d663242734161ad04e4a50857f309",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "env = lmdb.open('../data/train.lmdb', map_size=map_size)\n",
    "\n",
    "## original data\n",
    "index = 0\n",
    "for x, y in progress_iter(train_data, verbose=True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                w/2+crop_size/2, h/2+crop_size/2))\n",
    "\n",
    "    str_id = '{:06}'.format(index)\n",
    "    with env.begin(write=True) as txn:\n",
    "        txn.put(str_id, x.tobytes())\n",
    "\n",
    "    index += 1\n",
    "    \n",
    "## additional train data\n",
    "additional_train_ind = []\n",
    "for x, y in progress_iter(additional_train_data, verbose=True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                w/2+crop_size/2, h/2+crop_size/2))\n",
    "\n",
    "    str_id = '{:06}'.format(index)\n",
    "    with env.begin(write=True) as txn:\n",
    "        txn.put(str_id, x.tobytes())\n",
    "\n",
    "    additional_train_ind.append(index)\n",
    "    index += 1\n",
    "\n",
    "## additional val data\n",
    "additional_val_ind = []\n",
    "for x, y in progress_iter(additional_val_data, verbose=True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                w/2+crop_size/2, h/2+crop_size/2))\n",
    "\n",
    "    str_id = '{:06}'.format(index)\n",
    "    with env.begin(write=True) as txn:\n",
    "        txn.put(str_id, x.tobytes())\n",
    "    \n",
    "    additional_val_ind.append(index)\n",
    "    index += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'branch_pages': 1L,\n",
       " 'depth': 2L,\n",
       " 'entries': 15097L,\n",
       " 'leaf_pages': 90L,\n",
       " 'overflow_pages': 11602937L,\n",
       " 'psize': 4096L}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.stat()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### check"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "605d3194f3504231b365e347918c8228",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2976\n",
      "5491\n",
      "5515\n",
      "5595\n",
      "5693\n",
      "7271\n",
      "7449\n",
      "7742\n",
      "13176\n",
      "13380\n",
      "13559\n",
      "13651\n",
      "13743\n",
      "14207\n",
      "14342\n",
      "14550\n"
     ]
    }
   ],
   "source": [
    "env = lmdb.open('../data/train.lmdb', readonly=True)\n",
    "for index in progress_iter(range(15097), verbose=True):\n",
    "# 2976, 5491, 5515\n",
    "# index = 2976\n",
    "    with env.begin() as txn:\n",
    "        bytes = txn.get('{:06}'.format(index))\n",
    "        if len(bytes) < 1024**2*3:\n",
    "            print index\n",
    "#         x = Image.frombytes('RGB', (1024, 1024), bytes)\n",
    "    #         plt.imshow(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-14-0504baaf512b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcond\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcond\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
     ]
    }
   ],
   "source": [
    "x = np.arange(10)\n",
    "cond = [x not in [2, 3, 5]]\n",
    "np.select(cond, np.arange(10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.save('../data/additional_train_ind.npy', np.asarray(additional_train_ind))\n",
    "np.save('../data/additional_val_ind.npy', np.asarray(additional_val_ind))\n",
    "y = np.asarray(train_data.y + additional_train_data.y + additional_val_data.y)\n",
    "np.save('../data/y_train.npy', y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## #1.2 gen data and save to small blocks\n",
    "### split orig training data into 10 folds (~275 image in each)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1337)\n",
    "y_train = train_data.y\n",
    "for i, (_, val_ind) in enumerate(skf.split(y_train, y_train)):\n",
    "    n_fold = len(val_ind)\n",
    "    X_fold = np.zeros((n_fold, crop_size, crop_size, 3), dtype=np.uint8)\n",
    "    pos = 0\n",
    "    for j in progress_iter(val_ind, True):\n",
    "        x, _ = train_data[j]\n",
    "        w = x.size[0]\n",
    "        h = x.size[1]\n",
    "        x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                    w/2+crop_size/2, h/2+crop_size/2))\n",
    "        X_fold[pos] = np.asarray(x, dtype=np.uint8)\n",
    "        pos += 1\n",
    "    np.save('../data/X_{0}.npy'.format(i), X_fold)\n",
    "    np.save('../data/y_{0}.npy'.format(i), np.asarray(y_train)[val_ind])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### split additional training data into 43 folds (again ~275 each)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "skf = StratifiedKFold(n_splits=43, shuffle=True, random_state=1337)\n",
    "z = y_train_additional = additional_train_data.y\n",
    "for i, (_, ind) in enumerate(skf.split(z, z)):\n",
    "    n_fold = len(ind)\n",
    "    X_fold = np.zeros((n_fold, crop_size, crop_size, 3), dtype=np.uint8)\n",
    "    y_actual = []\n",
    "    pos = 0\n",
    "    if len(np.load('../data/y_train_{0}.npy'.format(i))) == len(np.load('../data/X_train_{0}.npy'.format(i))):\n",
    "        continue\n",
    "    for j in progress_iter(ind, True):\n",
    "        x, y = additional_train_data[j]\n",
    "        w = x.size[0]\n",
    "        h = x.size[1]\n",
    "        x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                    w/2+crop_size/2, h/2+crop_size/2))\n",
    "        x = np.asarray(x, dtype=np.uint8)\n",
    "        if len(x.shape) < 3:\n",
    "            continue\n",
    "        x = x[:,:,:3]\n",
    "        X_fold[pos] = x\n",
    "        y_actual.append(y)\n",
    "        pos += 1\n",
    "    X_fold = X_fold[:pos]\n",
    "    np.save('../data/X_train_{0}.npy'.format(i), X_fold)\n",
    "    np.save('../data/y_train_{0}.npy'.format(i), np.asarray(y_actual))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### save validation data as is (1024x1024 too)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bf76772d3b23450ca48e0bc47dd8b5f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "z_val = np.asarray(additional_val_data.y)\n",
    "c = crop_size\n",
    "X_fold = np.zeros((len(z_val), c, c, 3), dtype=np.uint8)\n",
    "pos = 0\n",
    "for x, _ in progress_iter(additional_val_data, True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-c/2, h/2-c/2,\n",
    "                w/2+c/2, h/2+c/2))\n",
    "    X_fold[pos] = np.asarray(x, dtype=np.uint8)\n",
    "    pos += 1\n",
    "np.save('../data/X_val.npy', X_fold)\n",
    "np.save('../data/y_val.npy', z_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "len(additional_val_data.y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.3 generate homogeneous blocks (train + additional train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "skf = StratifiedKFold(n_splits=100, shuffle=True, random_state=1337)\n",
    "z = np.concatenate((train_data.y, additional_train_data.y))\n",
    "X_links = train_data.X + additional_train_data.X\n",
    "\n",
    "for i, (_, ind) in progress_iter(list(enumerate(skf.split(z, z))), True):\n",
    "    n_fold = len(ind)\n",
    "    X_fold = np.zeros((n_fold, crop_size, crop_size, 3), dtype=np.uint8)\n",
    "    y_actual = []\n",
    "    pos = 0\n",
    "    for j in ind:\n",
    "        x, y = Image.open(X_links[j]), z[j]\n",
    "        w = x.size[0]\n",
    "        h = x.size[1]\n",
    "        x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                    w/2+crop_size/2, h/2+crop_size/2))\n",
    "        x = np.asarray(x, dtype=np.uint8)\n",
    "        if len(x.shape) < 3:\n",
    "            continue\n",
    "        x = x[:,:,:3]\n",
    "        X_fold[pos] = x\n",
    "        y_actual.append(y)\n",
    "        pos += 1\n",
    "    X_fold = X_fold[:pos]\n",
    "    assert len(X_fold) == len(y_actual)\n",
    "    np.save('../data/X_{0}.npy'.format(i), X_fold)\n",
    "    np.save('../data/y_{0}.npy'.format(i), np.asarray(y_actual))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## #2 gen aug data (center crops + D4 group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b3b635f39bf40cbbe8ed879bb924d98",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "X = np.zeros((N * 8, crop_size, crop_size, 3), dtype=np.uint8)\n",
    "\n",
    "pos = 0\n",
    "for x, _ in progress_iter(train_data, verbose=True):\n",
    "    w = x.size[0]\n",
    "    h = x.size[1]\n",
    "    x = x.crop((w/2-crop_size/2, h/2-crop_size/2,\n",
    "                w/2+crop_size/2, h/2+crop_size/2))\n",
    "    \n",
    "    X[pos, ...] = np.asarray(x, dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_90), dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_180), dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_270), dtype=np.uint8)\n",
    "    pos += 1\n",
    "    \n",
    "    x = PIL.ImageOps.mirror(x)\n",
    "    \n",
    "    X[pos, ...] = np.asarray(x, dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_90), dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_180), dtype=np.uint8)\n",
    "    pos += 1\n",
    "    X[pos, ...] = np.asarray(x.transpose(Image.ROTATE_270), dtype=np.uint8)\n",
    "    pos += 1\n",
    "\n",
    "y = np.asarray(train_data.y).repeat(8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## split into folds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 2 3 4 6 7 8 9] [0 5]\n"
     ]
    }
   ],
   "source": [
    "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1337)\n",
    "z = np.asarray([0, 1]).repeat(5)\n",
    "fold = 3\n",
    "for i, (train_ind, test_ind) in enumerate(skf.split(z, z)):\n",
    "    if i == fold:\n",
    "        print train_ind, test_ind\n",
    "\n",
    "# new_ind = np.concatenate( [test_ind for _, test_ind in skf.split(z, z)] )\n",
    "# print new_ind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2750,)\n"
     ]
    }
   ],
   "source": [
    "new_ind = np.concatenate( [test_ind for _, test_ind in skf.split(y, y)] )\n",
    "print new_ind.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = X[new_ind]\n",
    "y = y[new_ind]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### reshape for easy extraction of train, val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5, 550, 256, 256, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = X.reshape(len(X)/5, 5, 256, 256, 3).transpose((1, 0, 2, 3, 4))\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5, 550)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = y.reshape(len(y)/5, 5).T\n",
    "y.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### save"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.save('../data/X_folds.npy', X)\n",
    "np.save('../data/y_folds.npy', y)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
